repo: Call fdatasync() before adding objects to the repo
authorColin Walters <walters@verbum.org>
Mon, 27 Aug 2012 15:53:06 +0000 (11:53 -0400)
committerColin Walters <walters@verbum.org>
Mon, 27 Aug 2012 19:35:40 +0000 (15:35 -0400)
I run builds on my laptop, but it also crashes about 1/4 of the time
while suspending.  It's definitely undesrirable to get e.g. empty
.dirtree objects because they corrupt builds.  Concretely, I was
getting empty contents committed for xorg-util-macros.

Now, we used to write out temporary files using g_file_replace() which
does a fsync() during close, but then switched to a more "manual"
g_file_append_to().

We could switch back to g_file_replace(), but the problem is, we don't
want to call fsync() on temporary files in the case where we already
have the object.  Attempting to add an object we already have is a
*very* common case.

This is both the old and new code sequence for the case where an
object is already stored:

open(temp, O_WRONLY)
write() write() write()
close()
lstat(objects/3a/9fe332...) = 0
unlink(temp)

In the *new* code, here's the case where an object *isn't* stored:

open(temp, O_WRONLY)
write() write() write()
close()
lstat(objects/3a/9fe332...) = -1
open(temp, O_RDONLY)
fdatasync()
close()
rename(temp, objects/3a/9fe332)

Compare with the *old* code path for when an object isn't stored:

open(temp, O_WRONLY)
write() write() write()
close()
lstat(objects/3a/9fe332...) = -1
link(temp, objects/3a/9fe332)
unlink(temp)

The problem with this is we really need to fdatasync().  Also doing
just rename() instead of the weird link()/unlink() helps us express to
the filesystem that we want atomic semantics.  For example, BTRFS has
special handling for rename().

src/libostree/ostree-repo.c
src/libotutil/ot-unix-utils.c
src/libotutil/ot-unix-utils.h

index c74c095a508cab3f545b7067621b6ef4c227ee53..ab41e30dabe79ca992ef9201a74c4ec696bc56d2 100644 (file)
@@ -30,6 +30,7 @@
 
 #include <gio/gunixoutputstream.h>
 #include <gio/gunixinputstream.h>
+#include <glib/gstdio.h>
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -797,10 +798,46 @@ ostree_repo_get_archive_content_path (OstreeRepo    *self,
   return g_file_resolve_relative_path (self->repodir, path);
 }
 
+/**
+ * ensure_file_data_synced:
+ *
+ * Ensure that in case of a power cut, these files have the data we
+ * want.   See http://lwn.net/Articles/322823/
+ */
+static gboolean
+ensure_file_data_synced (GFile         *file,
+                         GCancellable  *cancellable,
+                         GError       **error)
+{
+  gboolean ret = FALSE;
+  int fd = -1;
+
+  fd = g_open (ot_gfile_get_path_cached (file), O_RDONLY | O_NOATIME | O_CLOEXEC | O_LARGEFILE, 0);
+  if (fd < 0)
+    {
+      ot_util_set_error_from_errno (error, errno);
+      goto out;
+    }
+
+  if (!ot_unix_fdatasync (fd, error))
+    goto out;
+
+  if (!ot_unix_close (fd, error))
+    goto out;
+  fd = -1;
+
+  ret = TRUE;
+ out:
+  if (fd != -1)
+    (void) close (fd);
+  return ret;
+}
+
 static gboolean
 commit_loose_object_impl (OstreeRepo        *self,
                           GFile             *tempfile_path,
                           GFile             *dest,
+                          gboolean           is_regular,
                           GCancellable      *cancellable,
                           GError           **error)
 {
@@ -810,8 +847,14 @@ commit_loose_object_impl (OstreeRepo        *self,
   parent = g_file_get_parent (dest);
   if (!ot_gfile_ensure_directory (parent, FALSE, error))
     goto out;
+
+  if (is_regular)
+    {
+      if (!ensure_file_data_synced (tempfile_path, cancellable, error))
+        goto out;
+    }
   
-  if (link (ot_gfile_get_path_cached (tempfile_path), ot_gfile_get_path_cached (dest)) < 0)
+  if (rename (ot_gfile_get_path_cached (tempfile_path), ot_gfile_get_path_cached (dest)) < 0)
     {
       if (errno != EEXIST)
         {
@@ -822,7 +865,6 @@ commit_loose_object_impl (OstreeRepo        *self,
         }
     }
 
-  (void) unlink (ot_gfile_get_path_cached (tempfile_path));
   ret = TRUE;
  out:
   return ret;
@@ -833,6 +875,7 @@ commit_loose_object_trusted (OstreeRepo        *self,
                              const char        *checksum,
                              OstreeObjectType   objtype,
                              GFile             *tempfile_path,
+                             gboolean           is_regular,
                              GCancellable      *cancellable,
                              GError           **error)
 {
@@ -841,7 +884,7 @@ commit_loose_object_trusted (OstreeRepo        *self,
 
   dest_file = ostree_repo_get_object_path (self, checksum, objtype);
 
-  if (!commit_loose_object_impl (self, tempfile_path, dest_file,
+  if (!commit_loose_object_impl (self, tempfile_path, dest_file, is_regular,
                                  cancellable, error))
     goto out;
 
@@ -879,6 +922,7 @@ stage_object_internal (OstreeRepo         *self,
   GChecksum *checksum = NULL;
   gboolean staged_raw_file = FALSE;
   gboolean staged_archive_file = FALSE;
+  gboolean temp_file_is_regular;
 
   if (out_csum)
     {
@@ -900,6 +944,8 @@ stage_object_internal (OstreeRepo         *self,
                                         cancellable, error))
         goto out;
 
+      temp_file_is_regular = g_file_info_get_file_type (file_info) == G_FILE_TYPE_REGULAR;
+
       if (ostree_repo_get_mode (self) == OSTREE_REPO_MODE_BARE)
         {
           if (!ostree_create_temp_file_from_input (self->tmp_dir,
@@ -973,8 +1019,9 @@ stage_object_internal (OstreeRepo         *self,
                                                &temp_file,
                                                cancellable, error))
         goto out;
+      temp_file_is_regular = TRUE;
     }
-          
+
   if (!checksum)
     actual_checksum = expected_checksum;
   else
@@ -1012,6 +1059,7 @@ stage_object_internal (OstreeRepo         *self,
           ot_lobj GInputStream *file_input = NULL;
           ot_lobj GFileInfo *file_info = NULL;
           ot_lvariant GVariant *xattrs = NULL;
+          gboolean is_regular;
               
           if (!ostree_content_file_parse (temp_file, FALSE, &file_input,
                                           &file_info, &xattrs,
@@ -1025,8 +1073,10 @@ stage_object_internal (OstreeRepo         *self,
                                                    cancellable, error))
             goto out;
 
+          is_regular = g_file_info_get_file_type (file_info) == G_FILE_TYPE_REGULAR;
+
           if (!commit_loose_object_trusted (self, actual_checksum, objtype, 
-                                            raw_temp_file, cancellable, error))
+                                            raw_temp_file, is_regular, cancellable, error))
             goto out;
           g_clear_object (&raw_temp_file);
         }
@@ -1039,13 +1089,13 @@ stage_object_internal (OstreeRepo         *self,
 
               archive_content_dest = ostree_repo_get_archive_content_path (self, actual_checksum);
                                                                    
-              if (!commit_loose_object_impl (self, raw_temp_file, archive_content_dest,
+              if (!commit_loose_object_impl (self, raw_temp_file, archive_content_dest, TRUE,
                                              cancellable, error))
                 goto out;
               g_clear_object (&raw_temp_file);
             }
-          if (!commit_loose_object_trusted (self, actual_checksum, objtype, 
-                                            temp_file, cancellable, error))
+          if (!commit_loose_object_trusted (self, actual_checksum, objtype, temp_file, temp_file_is_regular,
+                                            cancellable, error))
             goto out;
           g_clear_object (&temp_file);
         }
index ee8f0397624d0e06862a9ce4a9ae19293e4dc565..600082d6ebab2e02726c52e2249c379b13d9ed82 100644 (file)
@@ -31,6 +31,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 #include <stdlib.h>
+#include <errno.h>
 #include <dirent.h>
 
 gboolean
@@ -184,3 +185,43 @@ ot_util_fatal_gerror (GError *error)
   g_assert (error != NULL);
   ot_util_fatal_literal (error->message);
 }
+
+/**
+ * ot_unix_fdatasync:
+ *
+ * Like fdatasync(), but uses #GError, and handles EINTR.
+ */
+gboolean
+ot_unix_fdatasync (int fd, GError **error)
+{
+  int result;
+  do
+    result = fdatasync (fd);
+  while (G_UNLIKELY (result != 0 && errno == EINTR));
+  if (result != 0)
+    {
+      ot_util_set_error_from_errno (error, errno);
+      return FALSE;
+    }
+  return TRUE;
+}
+
+/**
+ * ot_unix_close:
+ *
+ * Like close(), but uses #GError, and handles EINTR.
+ */
+gboolean
+ot_unix_close (int fd, GError **error)
+{
+  int result;
+  do
+    result = close (fd);
+  while (G_UNLIKELY (result != 0 && errno == EINTR));
+  if (result != 0)
+    {
+      ot_util_set_error_from_errno (error, errno);
+      return FALSE;
+    }
+  return TRUE;
+}
index 58dd59b5a0a103a17c70e71f68ae6659e602ed85..2022dea89f070ea2e3229ab50266590f52584f9e 100644 (file)
@@ -49,6 +49,16 @@ gboolean ot_util_path_split_validate (const char *path, GPtrArray **out_componen
 
 void ot_util_set_error_from_errno (GError **error, gint saved_errno);
 
+gboolean ot_unix_open (const char *path, 
+                       int         flags,
+                       mode_t      mode,
+                       int        *out_fd,
+                       GError    **error);
+
+gboolean ot_unix_fdatasync (int fd, GError **error);
+
+gboolean ot_unix_close (int fd, GError **error);
+
 G_END_DECLS
 
 #endif